Lesson 06 - AI Programming - Python Rating Prediction

Dr Tran Anh Tuan (Head of AI in VTC Academy)

In [1]:
# Mount drive 
from google.colab import drive
drive.mount('/content/gdrive')
Mounted at /content/gdrive
In [2]:
import os
path_Data = "//content//gdrive//MyDrive//VTCE AI TUAN//Rating Prediction//Game//"
checkPath = os.path.isdir(path_Data)
checkFile = os.path.isfile(path_Data + "playerStats.csv")
print("The path and file are valid or not :", checkPath, checkFile)  
The path and file are valid or not : True True
In [6]:
import pandas as pd
import numpy as np
data  = pd.read_csv(path_Data + "playerStats.csv")
data.head(20)
Out[6]:
Map Player Kills Deaths ADR KAST% Rating MatchID
0 Nuke 4216 22 19 85.0 69.0 1.17 2311133/binary-dragons-vs-alpha-binary-dragons...
1 Nuke 3543 26 23 83.5 58.6 1.10 2311133/binary-dragons-vs-alpha-binary-dragons...
2 Nuke 3972 20 23 78.2 75.9 1.02 2311133/binary-dragons-vs-alpha-binary-dragons...
3 Nuke 2476 17 20 68.1 79.3 0.97 2311133/binary-dragons-vs-alpha-binary-dragons...
4 Nuke 2299 17 20 66.3 65.5 0.86 2311133/binary-dragons-vs-alpha-binary-dragons...
5 Nuke 11915 28 21 99.8 65.5 1.35 2311133/binary-dragons-vs-alpha-binary-dragons...
6 Nuke 12802 20 17 78.2 75.9 1.20 2311133/binary-dragons-vs-alpha-binary-dragons...
7 Nuke 7590 21 24 85.6 65.5 1.03 2311133/binary-dragons-vs-alpha-binary-dragons...
8 Nuke 2982 18 20 66.6 69.0 1.00 2311133/binary-dragons-vs-alpha-binary-dragons...
9 Nuke 9903 18 20 57.5 65.5 0.87 2311133/binary-dragons-vs-alpha-binary-dragons...
10 Cobblestone 2299 25 12 118.6 90.5 1.82 2311133/binary-dragons-vs-alpha-binary-dragons...
11 Cobblestone 3972 19 10 114.8 85.7 1.67 2311133/binary-dragons-vs-alpha-binary-dragons...
12 Cobblestone 4216 20 11 90.0 90.5 1.55 2311133/binary-dragons-vs-alpha-binary-dragons...
13 Cobblestone 3543 15 10 59.2 90.5 1.22 2311133/binary-dragons-vs-alpha-binary-dragons...
14 Cobblestone 2476 14 12 68.9 81.0 1.07 2311133/binary-dragons-vs-alpha-binary-dragons...
15 Cobblestone 11915 19 19 85.0 71.4 1.01 2311133/binary-dragons-vs-alpha-binary-dragons...
16 Cobblestone 9903 13 18 65.8 57.1 0.81 2311133/binary-dragons-vs-alpha-binary-dragons...
17 Cobblestone 7590 8 19 44.7 33.3 0.55 2311133/binary-dragons-vs-alpha-binary-dragons...
18 Cobblestone 2982 7 18 56.2 38.1 0.52 2311133/binary-dragons-vs-alpha-binary-dragons...
19 Cobblestone 12802 8 19 54.2 52.4 0.50 2311133/binary-dragons-vs-alpha-binary-dragons...
In [4]:
import matplotlib.pyplot as plt
import math
from IPython.core.debugger import Tracer;
In [23]:
# Average damage per round : ADR
dataSet = data.loc[:,["ADR", "Rating"]] 
dataSet['Rating'] = dataSet['Rating']*100
display(dataSet.head())
ADR Rating
0 85.0 117.0
1 83.5 110.0
2 78.2 102.0
3 68.1 97.0
4 66.3 86.0
In [25]:
X= np.array(dataSet['ADR'])
y = np.array(dataSet['Rating'])
plt.scatter(X,y)
plt.xlabel('ADR')
plt.ylabel('Rating')
plt.grid()
plt.show()
In [26]:
def SSE(m,b,data):
    totalError=0.0
    totalNan = 0
    for i in range(data.shape[0]):
        if(math.isnan(data[i,0])):
            totalNan +=1
        else:
            yOutput = m*data[i,0]+b
            y = data[i,1]
            error = (y-yOutput)**2
            totalError =totalError+ error
    return totalError
In [27]:
m = 3
b = 2
plt.scatter(X,y)
plt.plot(X,m*X+b,color='red')
plt.show()
In [29]:
data = np.array(dataSet)
data
data[1,0]
Out[29]:
83.5
In [30]:
sse = SSE(m,b,data)
print('For the fitting line: y = %sx + %s\nSSE: %.2f' %(m,b,sse))
For the fitting line: y = 3x + 2
SSE: 2917099397.30
In [31]:
def gradient_descent_step(m,b,data):
    n_points = data.shape[0] #size of data
    m_grad = 0
    b_grad = 0
    stepper = 0.0001 #this is the learning rate
    for i in range(n_points):
        #Get current pair (x,y)
        x = data[i,0]
        y = data[i,1]
        if(math.isnan(x)|math.isnan(y)): #it will prevent for crashing when some data is missing
            #print("is nan")
            continue
        #you will calculate the partical derivative for each value in data
        #Partial derivative respect 'm'
        dm = -((2/n_points) * x * (y - (m*x + b)))
        #Partial derivative respect 'b'
        db = - ((2/n_points) * (y - (m*x + b)))
        #Update gradient
        m_grad = m_grad + dm
        b_grad = b_grad + db
    #Set the new 'better' updated 'm' and 'b'   
    m_updated = m - stepper*m_grad
    b_updated = b - stepper*b_grad
    #print('m ', m)
    ##print('steepr*gradient ',stepper*m_grad)
    #print('m_updated', m_updated)
    '''
    Important note: The value '0.0001' that multiplies the 'm_grad' and 'b_grad' is the 'learning rate', but it's a concept
    out of the scope of this challenge. For now, just leave that there and think about it like a 'smoother' of the learn, 
    to prevent overshooting, that is, an extremly fast and uncontrolled learning.
    '''
    return m_updated,b_updated
In [35]:
m = 3
b = 2
print('Starting line: y = %.2fx + %.2f - Error: %.2f' %(m,b,sse))
for i in range(50): #Should Try 10000
    m,b = gradient_descent_step(m,b,data)
    sse = SSE(m,b,data)
    print('At step %d - Line: y = %.2fx + %.2f - Error: %.2f' %(i+1,m,b,sse))
print('\nBest  line: y = %.2fx + %.2f - Error: %.2f' %(m,b,sse))
Starting line: y = 3.00x + 2.00 - Error: 55164820.49
At step 1 - Line: y = 1.04x + 1.98 - Error: 192438166.05
At step 2 - Line: y = 1.47x + 1.98 - Error: 61750014.76
At step 3 - Line: y = 1.38x + 1.98 - Error: 55481490.65
At step 4 - Line: y = 1.40x + 1.98 - Error: 55180739.97
At step 5 - Line: y = 1.39x + 1.98 - Error: 55166232.87
At step 6 - Line: y = 1.39x + 1.98 - Error: 55165455.42
At step 7 - Line: y = 1.39x + 1.98 - Error: 55165336.51
At step 8 - Line: y = 1.39x + 1.98 - Error: 55165249.19
At step 9 - Line: y = 1.39x + 1.98 - Error: 55165163.38
At step 10 - Line: y = 1.39x + 1.98 - Error: 55165077.65
At step 11 - Line: y = 1.39x + 1.98 - Error: 55164991.93
At step 12 - Line: y = 1.39x + 1.98 - Error: 55164906.21
At step 13 - Line: y = 1.39x + 1.98 - Error: 55164820.49
At step 14 - Line: y = 1.39x + 1.98 - Error: 55164734.77
At step 15 - Line: y = 1.39x + 1.98 - Error: 55164649.06
At step 16 - Line: y = 1.39x + 1.98 - Error: 55164563.34
At step 17 - Line: y = 1.39x + 1.98 - Error: 55164477.63
At step 18 - Line: y = 1.39x + 1.98 - Error: 55164391.92
At step 19 - Line: y = 1.39x + 1.98 - Error: 55164306.22
At step 20 - Line: y = 1.39x + 1.98 - Error: 55164220.51
At step 21 - Line: y = 1.39x + 1.98 - Error: 55164134.81
At step 22 - Line: y = 1.39x + 1.98 - Error: 55164049.11
At step 23 - Line: y = 1.39x + 1.98 - Error: 55163963.41
At step 24 - Line: y = 1.39x + 1.97 - Error: 55163877.71
At step 25 - Line: y = 1.39x + 1.97 - Error: 55163792.02
At step 26 - Line: y = 1.39x + 1.97 - Error: 55163706.33
At step 27 - Line: y = 1.39x + 1.97 - Error: 55163620.64
At step 28 - Line: y = 1.39x + 1.97 - Error: 55163534.95
At step 29 - Line: y = 1.39x + 1.97 - Error: 55163449.26
At step 30 - Line: y = 1.39x + 1.97 - Error: 55163363.58
At step 31 - Line: y = 1.39x + 1.97 - Error: 55163277.89
At step 32 - Line: y = 1.39x + 1.97 - Error: 55163192.21
At step 33 - Line: y = 1.39x + 1.97 - Error: 55163106.54
At step 34 - Line: y = 1.39x + 1.97 - Error: 55163020.86
At step 35 - Line: y = 1.39x + 1.97 - Error: 55162935.19
At step 36 - Line: y = 1.39x + 1.97 - Error: 55162849.51
At step 37 - Line: y = 1.39x + 1.97 - Error: 55162763.85
At step 38 - Line: y = 1.39x + 1.97 - Error: 55162678.18
At step 39 - Line: y = 1.39x + 1.97 - Error: 55162592.51
At step 40 - Line: y = 1.39x + 1.97 - Error: 55162506.85
At step 41 - Line: y = 1.39x + 1.97 - Error: 55162421.19
At step 42 - Line: y = 1.39x + 1.97 - Error: 55162335.53
At step 43 - Line: y = 1.39x + 1.97 - Error: 55162249.87
At step 44 - Line: y = 1.39x + 1.97 - Error: 55162164.21
At step 45 - Line: y = 1.39x + 1.97 - Error: 55162078.56
At step 46 - Line: y = 1.39x + 1.97 - Error: 55161992.91
At step 47 - Line: y = 1.39x + 1.97 - Error: 55161907.26
At step 48 - Line: y = 1.39x + 1.97 - Error: 55161821.61
At step 49 - Line: y = 1.39x + 1.97 - Error: 55161735.97
At step 50 - Line: y = 1.39x + 1.97 - Error: 55161650.33

Best  line: y = 1.39x + 1.97 - Error: 55161650.33
In [38]:
print ('m ', m)
print('b ', b)
plt.scatter(X,y)
plt.plot(X,m*X+b,color='red')
plt.plot(X,3*X+2,color='green')
plt.show()
m  1.3923305664859575
b  1.9691739421715928

House Price Prediction

Sale Price and Living Area

In [39]:
import os
path_Data = "//content//gdrive//MyDrive//VTCE AI TUAN//House Price Prediction//"
checkPath = os.path.isdir(path_Data)
checkFile = os.path.isfile(path_Data + "house-prices 2.csv")
print("The path and file are valid or not :", checkPath, checkFile)  
The path and file are valid or not : True True
In [40]:
#Load the libraries and data...
import matplotlib.pyplot as plt
import matplotlib.animation as animation

data = pd.read_csv(path_Data + "house-prices 2.csv")
display(data.head())
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities LotConfig LandSlope Neighborhood Condition1 Condition2 BldgType HouseStyle OverallQual OverallCond YearBuilt YearRemodAdd RoofStyle RoofMatl Exterior1st Exterior2nd MasVnrType MasVnrArea ExterQual ExterCond Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinSF1 BsmtFinType2 BsmtFinSF2 BsmtUnfSF TotalBsmtSF Heating ... CentralAir Electrical 1stFlrSF 2ndFlrSF LowQualFinSF GrLivArea BsmtFullBath BsmtHalfBath FullBath HalfBath BedroomAbvGr KitchenAbvGr KitchenQual TotRmsAbvGrd Functional Fireplaces FireplaceQu GarageType GarageYrBlt GarageFinish GarageCars GarageArea GarageQual GarageCond PavedDrive WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition SalePrice
0 1 60 RL 65.0 8450 Pave NaN Reg Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story 7 5 2003 2003 Gable CompShg VinylSd VinylSd BrkFace 196.0 Gd TA PConc Gd TA No GLQ 706 Unf 0 150 856 GasA ... Y SBrkr 856 854 0 1710 1 0 2 1 3 1 Gd 8 Typ 0 NaN Attchd 2003.0 RFn 2 548 TA TA Y 0 61 0 0 0 0 NaN NaN NaN 0 2 2008 WD Normal 208500
1 2 20 RL 80.0 9600 Pave NaN Reg Lvl AllPub FR2 Gtl Veenker Feedr Norm 1Fam 1Story 6 8 1976 1976 Gable CompShg MetalSd MetalSd None 0.0 TA TA CBlock Gd TA Gd ALQ 978 Unf 0 284 1262 GasA ... Y SBrkr 1262 0 0 1262 0 1 2 0 3 1 TA 6 Typ 1 TA Attchd 1976.0 RFn 2 460 TA TA Y 298 0 0 0 0 0 NaN NaN NaN 0 5 2007 WD Normal 181500
2 3 60 RL 68.0 11250 Pave NaN IR1 Lvl AllPub Inside Gtl CollgCr Norm Norm 1Fam 2Story 7 5 2001 2002 Gable CompShg VinylSd VinylSd BrkFace 162.0 Gd TA PConc Gd TA Mn GLQ 486 Unf 0 434 920 GasA ... Y SBrkr 920 866 0 1786 1 0 2 1 3 1 Gd 6 Typ 1 TA Attchd 2001.0 RFn 2 608 TA TA Y 0 42 0 0 0 0 NaN NaN NaN 0 9 2008 WD Normal 223500
3 4 70 RL 60.0 9550 Pave NaN IR1 Lvl AllPub Corner Gtl Crawfor Norm Norm 1Fam 2Story 7 5 1915 1970 Gable CompShg Wd Sdng Wd Shng None 0.0 TA TA BrkTil TA Gd No ALQ 216 Unf 0 540 756 GasA ... Y SBrkr 961 756 0 1717 1 0 1 0 3 1 Gd 7 Typ 1 Gd Detchd 1998.0 Unf 3 642 TA TA Y 0 35 272 0 0 0 NaN NaN NaN 0 2 2006 WD Abnorml 140000
4 5 60 RL 84.0 14260 Pave NaN IR1 Lvl AllPub FR2 Gtl NoRidge Norm Norm 1Fam 2Story 8 5 2000 2000 Gable CompShg VinylSd VinylSd BrkFace 350.0 Gd TA PConc Gd TA Av GLQ 655 Unf 0 490 1145 GasA ... Y SBrkr 1145 1053 0 2198 1 0 2 1 4 1 Gd 9 Typ 1 TA Attchd 2000.0 RFn 3 836 TA TA Y 192 84 0 0 0 0 NaN NaN NaN 0 12 2008 WD Normal 250000

5 rows × 81 columns

In [41]:
x = data['GrLivArea']
y = data['SalePrice']

x = (x - x.mean()) / x.std()
x = np.c_[np.ones(x.shape[0]), x] 
In [42]:
#GRADIENT DESCENT

alpha = 0.01 #Step size
iterations = 2000 #No. of iterations
m = y.size #No. of data points
np.random.seed(123) #Set the seed
theta = np.random.rand(2) #Pick some random values to start with

#GRADIENT DESCENT
def gradient_descent(x, y, theta, iterations, alpha):
    past_costs = []
    past_thetas = [theta]
    for i in range(iterations):
        prediction = np.dot(x, theta)
        error = prediction - y
        cost = 1/(2*m) * np.dot(error.T, error)
        past_costs.append(cost)
        theta = theta - (alpha * (1/m) * np.dot(x.T, error))
        past_thetas.append(theta)
    return past_thetas, past_costs

#Pass the relevant variables to the function and get the new values back...
past_thetas, past_costs = gradient_descent(x, y, theta, iterations, alpha)
theta = past_thetas[-1]

#Print the results...
print("Gradient Descent: {:.2f}, {:.2f}".format(theta[0], theta[1]))
Gradient Descent: 180921.20, 56294.90
In [43]:
#Plot the cost function...
plt.title('Cost Function J')
plt.xlabel('No. of iterations')
plt.ylabel('Cost')
plt.plot(past_costs)
plt.show()
In [53]:
#Animation

#Set the plot up,
fig = plt.figure()
ax = plt.axes()
plt.title('Sale Price vs Living Area')
plt.xlabel('Living Area in square feet (normalised)')
plt.ylabel('Sale Price ($)')
plt.scatter(x[:,1], y, color='red')
line, = ax.plot([], [], lw=2)
annotation = ax.text(-1, 700000, '')
annotation.set_animated(True)
plt.close()

#Generate the animation data,
def init():
    line.set_data([], [])
    annotation.set_text('')
    return line, annotation

# animation function.  This is called sequentially
def animate(i):
    x = np.linspace(-5, 20, 1000)
    y = past_thetas[i][1]*x + past_thetas[i][0]
    line.set_data(x, y)
    annotation.set_text('Cost = %.2f e10' % (past_costs[i]/10000000000))
    return line, annotation

anim = animation.FuncAnimation(fig, animate, init_func=init,
                               frames=300, interval=0, blit=True)
anim.save('animation.gif', writer='pillow', fps = 30)
In [54]:
#Display the animation...
import io
import base64
from IPython.display import HTML

filename = 'animation.gif'

video = io.open(filename, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<img src="data:image/gif;base64,{0}" type="gif" />'''.format(encoded.decode('ascii')))
Out[54]: